Libraries

library(httr)
library(lubridate)
library(ggplot2)
library(gganimate)
library(ggrepel)
library(patchwork)
library(data.table)
library(broom)
library(rgdal)
require(maptools)
require(rgeos)

options(scipen=2)

Source: https://opendata.ecdc.europa.eu/covid19/

Load and process map and current data

tf <- "~/covid/global.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
globcases <- fread(tf)
setnames(globcases, c('dateRep', 'day', 'month', 'year', 'new_Cases', 'new_Deaths', 'Country', 'geoId', 'countryCode', 'population', 'continent', 'roll_norm_Cases'))
globcases[, Date := as.Date(dmy(dateRep))]
globcases <- globcases[, .(Country, countryCode, continent, population, Date, new_Cases, new_Deaths)]
# str(globcases)

## Create year-week field, to enable smoother merging with the other tables, as they can have NA dates.
# Start with separate fields, in case I need to add tweaks to them.
globcases[, year := strftime(globcases$Date, format='%Y')]
globcases[, week := as.integer(strftime(globcases$Date, format='%V'))]
globcases[, year_week := paste0(year, '-W', week)]
globcases[, year := NULL]
globcases[, week := NULL]

# Remove the cruise ship entry
globcases <- globcases[Country != 'Cases_on_an_international_conveyance_Japan', ]

# Some countries lag by a day in entering their data. So if I use the latest date I miss some countries,
# if I use the latest available entry, the countries are out of sync. Determine range, and as long as it's less than a couple days, take the latest common date as the "current" date.
end_dates <- globcases[, max(Date), by=Country]

if ( min(unique(end_dates$V1)) - today() >= -3) { # allow for lack of updates over weekends
  message('Entries for some countries lag by a couple days. Continuing with last common date.')
  current <- min(unique(end_dates$V1))
} else {
  message('Entries for some countries seem to have stopped. Continuing with latest available entry date.')
  current <- max(unique(end_dates$V1))
}
## Entries for some countries lag by a couple days. Continuing with last common date.
message(paste('Last entry date used:', current, '.'))
## Last entry date used: 2020-11-14 .
tf <- "~/covid/testing.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/testing/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
tests <- fread(tf)
setnames(tests, c('Country', 'countryCode', 'year_week', 'test_Cases', 'new_Tests', 'population', 'test_Rate', 'pos_Rate', 'source'))
tests <- tests[, .(Country, year_week, new_Tests)]
# str(tests)
## The UK has been mistyped in some entries, so that needs fixing to enable better correct merging of the tables.
tests[Country=='United Kingdom', Country := 'United_Kingdom']
## Fix inconsistent formatting that is messing up the mergers
tests[, year_week:= sub('W0', 'W', year_week)]

tf <- "~/covid/hospitalizations.csv"
# GET("https://opendata.ecdc.europa.eu/covid19/hospitalicuadmissionrates/csv", authenticate(":", ":", type="ntlm"), write_disk(tf, overwrite = TRUE))
icu <- fread(tf)
setnames(icu, c('Country', 'Type', 'Date', 'year_week', 'Value', 'source', 'URL'))
## The UK has been mistyped in some entries, so that needs fixing to enable better correct merging of the tables.
icu[Country=='United Kingdom', Country := 'United_Kingdom']
icu[, Date := as.Date(Date)]
## Fix inconsistent formatting that is messing up the mergers
icu[, year_week:= sub('W0', 'W', year_week)]

## Split daily and weekly metrics that are wreaking havoc on my mergers.
occup <- unique(icu[grepl('occupancy', Type, fixed=TRUE), .(Country, Date, Type, Value)])
occup <- dcast(occup, Country + Date ~ Type, value.var = 'Value')
setnames(occup, c('Country', 'Date', 'occup_ICU', 'occup_Hosp'))
admit <- unique(icu[grepl('admissions', Type, fixed=TRUE), .(Country, year_week, Type, Value)])
admit <- dcast(admit, Country + year_week ~ Type, value.var = 'Value')
setnames(admit, c('Country', 'year_week', 'norm_new_ICU', 'norm_new_Hosp'))
admit[, norm_new_ICU := norm_new_ICU * 10 ]   # re-normalize to 1M instead of 100K, to be same as my other norms
admit[, norm_new_Hosp := norm_new_Hosp * 10 ]
# str(icu)
# Marry the tables
DT <- merge(merge(globcases, occup, by=c('Country', 'Date'), all=TRUE), 
            merge(admit, tests, by=c('Country', 'year_week'), all=TRUE), 
            by=c('Country', 'year_week'), all=TRUE)
# View(merge(globcases, occup, by=c('Country', 'Date'), all=TRUE)[Country=='Denmark',])
# View(merge(admit, tests, by=c('Country', 'year_week'), all=TRUE)[Country=='Denmark',])
# View(DT[Country=='Denmark',])
# str(DT)

## Some country codes are NA in some rows. Try to fill in the missing values.
DT[, countryCode := .SD$countryCode[!is.na(.SD$countryCode)][1], by=Country]

# Sort newest last, for cumulative metrics.
setorder(DT, Country, Date)

## Missing days break the cumulative sums
DT[is.na(new_Cases), new_Cases := 0]
DT[is.na(new_Deaths), new_Deaths := 0]
DT[is.na(new_Tests), new_Tests := 0]
DT[is.na(norm_new_Hosp), norm_new_Hosp := 0]
DT[is.na(norm_new_ICU), norm_new_ICU := 0]

# Total Cases and Deaths
DT[, total_Cases := cumsum(new_Cases), by=Country]
DT[, total_Deaths := cumsum(new_Deaths), by=Country]
DT[, total_Tests := cumsum(new_Tests), by=Country]

# Sliding window cumulative cases in a W-days window.
W <- 7
DT[, roll_Cases := frollsum(new_Cases, n=W, align='right', na.rm=TRUE), by=Country]
DT[, roll_Deaths := frollsum(new_Deaths, n=W, align='right', na.rm=TRUE), by=Country]
DT[, roll_Tests := frollsum(new_Tests, n=W, align='right', na.rm=TRUE), by=Country]

# Population normalisations to P citizens
P <- 1e6
DT[, norm_new_Cases := new_Cases / population * P, by=Country]
DT[, norm_new_Deaths := new_Deaths / population * P, by=Country]
DT[, norm_new_Tests := new_Tests / population * P, by=Country]
# DT[is.na(norm_new_Cases), norm_new_Cases := 0]
# DT[is.na(norm_new_Deaths), norm_new_Deaths := 0]
# DT[is.na(norm_new_Tests), norm_new_Tests := 0]

DT[, norm_tot_Cases := total_Cases / population * P, by=Country]
DT[, norm_tot_Deaths := total_Deaths / population * P, by=Country]
DT[, norm_tot_Tests := total_Tests / population * P, by=Country]
DT[, norm_tot_ICU := cumsum(norm_new_ICU), by=Country]
DT[, norm_tot_Hosp := cumsum(norm_new_Hosp), by=Country]
# DT[is.na(norm_tot_Cases), norm_tot_Cases := 0]
# DT[is.na(norm_tot_Deaths), norm_tot_Deaths := 0]
# DT[is.na(norm_tot_Tests), norm_tot_Tests := 0]
# DT[is.na(norm_tot_ICU), norm_tot_ICU := 0]
# DT[is.na(norm_tot_Hosp), norm_tot_Hosp := 0]

DT[, norm_roll_Cases := roll_Cases / population * P, by=Country]
DT[, norm_roll_Deaths := roll_Deaths / population * P, by=Country]
DT[, norm_roll_Tests := roll_Tests / population * P, by=Country]
DT[, norm_roll_ICU := frollsum(norm_new_ICU, n=W, align='right', na.rm=TRUE), by=Country]
DT[, norm_roll_Hosp := frollsum(norm_new_Hosp, n=W, align='right', na.rm=TRUE), by=Country]

# Severity rates
DT[, Mortality := total_Deaths / total_Cases]

DT[, roll_TestCase_Rate := norm_roll_Cases / norm_roll_Tests]
DT[, roll_HospICU_Rate := norm_roll_ICU / norm_roll_Hosp]
DT[, roll_ICUDeath_Rate := norm_roll_Deaths / norm_roll_ICU]

# Global
DT[, gNew_Cases := sum(new_Cases, na.rm=TRUE), by=Date]
DT[, gNew_Deaths := sum(new_Deaths, na.rm=TRUE), by=Date]
DT[, gNew_Tests := sum(new_Tests, na.rm=TRUE), by=Date]
DT[, gTotal_Cases := sum(total_Cases, na.rm=TRUE), by=Date]
DT[, gTotal_Deaths := sum(total_Deaths, na.rm=TRUE), by=Date]
DT[, gTotal_Tests := sum(total_Tests, na.rm=TRUE), by=Date]
DT[, gRoll_Cases := sum(roll_Cases, na.rm=TRUE), by=Date]
DT[, gRoll_Deaths := sum(roll_Deaths, na.rm=TRUE), by=Date]
DT[, gRoll_Tests := sum(roll_Tests, na.rm=TRUE), by=Date]
DT[, gMortality := gTotal_Deaths / gTotal_Cases]

# Rates of daily change
DT[, roll_Cases_Rate := frollapply(roll_Cases, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Deaths_Rate := frollapply(roll_Deaths, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Tests_Rate := frollapply(norm_roll_Tests, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_Hosp_Rate := frollapply(norm_roll_Hosp, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, roll_ICU_Rate := frollapply(norm_roll_ICU, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[!is.finite(roll_Cases_Rate), roll_Cases_Rate := 0]
DT[!is.finite(roll_Deaths_Rate), roll_Deaths_Rate := 0]
DT[!is.finite(roll_Tests_Rate), roll_Tests_Rate := 0]
DT[!is.finite(roll_Hosp_Rate), roll_Hosp_Rate := 0]
DT[!is.finite(roll_ICU_Rate), roll_ICU_Rate := 0]

DT[, gRoll_Cases_Rate := frollapply(gRoll_Cases, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[, gRoll_Deaths_Rate := frollapply(gRoll_Deaths, n=2, FUN = function(x){x[2] / x[1]}, align="right"), by=Country]
DT[!is.finite(gRoll_Cases_Rate), gRoll_Cases_Rate := 0]
DT[!is.finite(gRoll_Deaths_Rate), gRoll_Deaths_Rate := 0]

# Compare severity to one country.
homecountry = 'Austria'
# Select countries
topInterest <- c('Austria', 'Italy', 'Greece', 'Luxembourg', 'Germany')
neighbour_countries <- c('Switzerland', 'Slovakia', 'Slovenia', 'Czechia', 'Hungary')
other_countries <- c('United_Kingdom', 'Spain', 'France', 'Belgium', 'United_States_of_America', 'Sweden', 'South_Korea')
# Download the shape file from the web and unzip it:
# download.file("http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip", destfile="~/covid/shape_files/world_shape_file.zip")
# system("unzip ~/covid/shape_files/world_shape_file.zip")
world_spdf <- readOGR(dsn='~/covid/shape_files/world_shape_file', layer='TM_WORLD_BORDERS_SIMPL-0.3')
## OGR data source with driver: ESRI Shapefile 
## Source: "D:\Documents\covid\shape_files\world_shape_file", layer: "TM_WORLD_BORDERS_SIMPL-0.3"
## with 246 features
## It has 11 fields
## Integer64 fields read as strings:  POP2005
# Dataframe-ize map.
world_df <- as.data.table(tidy(world_spdf, region="ISO3"))
## SpP is invalid
# For plottting, the order of rows is super important. 
# Merge operations later can change the order, so I need to be able to recover it.
world_df[, ord := 1:nrow(world_df)]

The Covid19 dataset from ECDC comes without geospacial data. The geospacial data available from other sources may not be the most up-to-date with recognised countries and names.

message(paste(sum(!(world_spdf$ISO3 %in% DT$countryCode)), 
                            "countries in the map file do not correspond to an entry in the Covid19 data."))
## 41 countries in the map file do not correspond to an entry in the Covid19 data.
print(world_spdf$ISO3[! world_spdf$ISO3 %in% DT$countryCode])
##  [1] "ASM" "COK" "GUF" "FSM" "PRK" "KIR" "MTQ" "MSR" "NIU" "HKG" "MAC" "MYT"
## [13] "ALA" "NFK" "CCK" "ATA" "BVT" "ATF" "HMD" "IOT" "CXR" "UMI" "NRU" "REU"
## [25] "TKL" "TON" "TUV" "TKM" "WLF" "WSM" "GLP" "ANT" "PCN" "PLW" "SPM" "SHN"
## [37] "SJM" "MAF" "BLM" "SGS" "TWN"
outgroup <- unique(DT[!(countryCode %in% world_spdf$ISO3), .(Country, countryCode)])
message(paste(nrow(outgroup),
                            "countries in the Covid19 data do not correspond to an entity in the map file:"))
## 8 countries in the Covid19 data do not correspond to an entity in the map file:
print(outgroup)
##                              Country countryCode
## 1: Bonaire, Saint Eustatius and Saba         BES
## 2:                          Curaçao         CUW
## 3:                            Kosovo         XKX
## 4:                        Montserrat         MSF
## 5:                      Sint_Maarten         SXM
## 6:                       South_Sudan         SSD
## 7:                            Taiwan     CNG1925
## 8:                 Wallis_and_Futuna

I think for a global overview map, the missing countries will not make a big difference.

Status

case_col = '#FF0000'
death_col = '#0055FF'
hosp_col = '#555500'
icu_col = '#995500'
test_col = '#005500'

print( data.frame(DaysTracked = length(unique(DT$Date)),
                    CountriesTracked = length(unique(DT$Country)) ) )
##   DaysTracked CountriesTracked
## 1         322              213
print( data.frame( Global_Cases = sum(DT$new_Cases, na.rm = TRUE),
                    Global_Deaths = sum(DT$new_Deaths, na.rm = TRUE),
                    Global_Mortality = sum(DT$new_Deaths, na.rm = TRUE) / sum(DT$new_Cases, na.rm = TRUE) ) )
##   Global_Cases Global_Deaths Global_Mortality
## 1     54109365       1313828       0.02428097

World worst and best

subDT <- DT[Date==current, , by=Country][, .(Country, norm_roll_Cases)]
# Worst
tail(subDT, n=10)
##               Country norm_roll_Cases
##  1:           Uruguay    148.19175725
##  2:        Uzbekistan     46.72285841
##  3:           Vanuatu              NA
##  4:         Venezuela     64.35022457
##  5:           Vietnam      0.44577089
##  6: Wallis_and_Futuna              NA
##  7:    Western_Sahara      0.00000000
##  8:             Yemen      0.06858259
##  9:            Zambia     15.34065721
## 10:          Zimbabwe     20.07446260
# Best
head(subDT, n=10)
##                 Country norm_roll_Cases
##  1:         Afghanistan        26.33948
##  2:             Albania      1219.59442
##  3:             Algeria       120.20053
##  4:             Andorra      7745.11992
##  5:              Angola        30.00757
##  6:            Anguilla         0.00000
##  7: Antigua_and_Barbuda        30.89121
##  8:           Argentina      1508.73117
##  9:             Armenia      3923.95785
## 10:               Aruba       677.26460
subDT <- DT[Date==current, , by=Country][, norm_roll_Deaths]
# Worst
tail(subDT, n=10)
##  [1] 0.86661846 0.45479745         NA 0.70136485 0.00000000         NA
##  [7] 0.00000000 0.03429129 0.05598780 0.47796340
# Best
head(subDT, n=10)
##  [1]  1.2880583 19.5638177  2.3459428  0.0000000  0.5027447  0.0000000
##  [7] 10.2970705 42.6299961 69.3099568 37.6258113

Europe worst and best

subDT <- DT[continent=='Europe', max(norm_roll_Cases, na.rm=TRUE), by=Country][order(V1),]
setnames(subDT, c('Country', 'norm_roll_Cases'))
# Worst
tail(subDT, n=10)
##           Country norm_roll_Cases
##  1:    San_Marino        6095.260
##  2: Liechtenstein        6175.413
##  3:      Slovenia        6341.943
##  4:   Switzerland        6721.496
##  5:    Montenegro        7068.671
##  6:    Luxembourg        7804.279
##  7:       Czechia        8459.032
##  8:       Belgium        9868.518
##  9:       Andorra       10331.202
## 10:      Holy_See       17177.914
# Best
head(subDT, n=10)
##        Country norm_roll_Cases
##  1:    Finland        311.1680
##  2:     Norway        768.1751
##  3:    Belarus        802.6525
##  4:     Jersey        983.3389
##  5:     Russia       1024.3003
##  6: Azerbaijan       1065.6150
##  7:    Estonia       1089.2046
##  8:     Latvia       1229.1872
##  9:    Albania       1232.5205
## 10:    Denmark       1370.1152

Reported events in last 7 days

case_col = '#FF0000'
death_col = '#0055FF'
hosp_col = '#555500'
icu_col = '#995500'
test_col = '#005500'

subDT <- merge(world_df, DT[Date==current, ], by.x='id', by.y='countryCode', all=TRUE)
setorder(subDT, ord)

Per 1M residents.

p1 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Cases)) +
    geom_polygon(colour='black', size=0.2) +
    scale_fill_gradient(high=case_col, low='white') +
    coord_cartesian(ylim=c(-60,80)) +
    theme_void() +
    theme(panel.background = element_rect(fill='#BBFFFF'))

p2 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Deaths)) +
    geom_polygon(colour='black', size=0.2) +
    scale_fill_gradient(high=death_col, low='white') +
    coord_cartesian(ylim=c(-60,80)) +
    theme_void() +
    theme(panel.background = element_rect(fill='#BBFFFF'))

print( p1 / p2 )

ggsave('~/covid/global_roll.png', plot = last_plot(), scale = 1,
       width = 30, height = 30, units = "cm", dpi = 300, limitsize = TRUE)
p1 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Cases)) +
    geom_polygon(colour='black', size=0.2) +
    scale_fill_gradient(high=case_col, low='white') +
  coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
    theme_void() +
    theme(panel.background = element_rect(fill='#BBFFFF'))

p2 <- ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Deaths)) +
    geom_polygon(colour='black', size=0.2) +
    scale_fill_gradient(high=death_col, low='white') +
  coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
    theme_void() +
    theme(panel.background = element_rect(fill='#BBFFFF'))

print( p1 / p2 )

ggsave('~/covid/eu_roll.png', plot = last_plot(), scale = 1,
       width = 21, height = 36, units = "cm", dpi = 300, limitsize = TRUE)
# Testing data seems to lag in date yet a bit more than case data.
subDT <- merge(world_df, DT[Date==current-1, ], by.x='id', by.y='countryCode', all=TRUE)
setorder(subDT, ord)

if(any(is.finite(subDT$norm_roll_Tests) & subDT$norm_roll_Tests > 0)){
  print(
    ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Tests)) +
        geom_polygon(colour='black', size=0.2) +
        scale_fill_gradient(high=test_col, low='white') +
      coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
        theme_void() +
        theme(panel.background = element_rect(fill='#BBFFFF'))
  )
  
  ggsave('~/covid/ertest.png', plot = last_plot(), scale = 1,
       width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}

if(any(!is.na(subDT$norm_roll_Hosp) & subDT$norm_roll_Hosp > 0)){
  print(
    ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_Hosp)) +
        geom_polygon(colour='black', size=0.2) +
        scale_fill_gradient(high=hosp_col, low='white') +
      coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
        theme_void() +
        theme(panel.background = element_rect(fill='#BBFFFF'))
  )

    ggsave('~/covid/erhosp.png', plot = last_plot(), scale = 1,
       width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}

if(any(!is.na(subDT$norm_roll_ICU) & subDT$norm_roll_ICU > 0)){
  print(
    ggplot(subDT, aes(x=long, y=lat, group=group, fill=norm_roll_ICU)) +
        geom_polygon(colour='black', size=0.2) +
        scale_fill_gradient(high=icu_col, low='white') +
      coord_cartesian(xlim=c(-10, 50), ylim=c(30,70)) +
        theme_void() +
        theme(panel.background = element_rect(fill='#BBFFFF'))
  )
  
  ggsave('~/covid/ericu.png', plot = last_plot(), scale = 1,
       width = 21, height = 18, units = "cm", dpi = 300, limitsize = TRUE)
}

Countries of personal interest

setorder(DT, Country, Date)

Normalized to 10^{6} residents.

In last 7 days

DT[Date==current & Country %in% topInterest, .(Country, norm_roll_Cases, norm_roll_Deaths, norm_roll_Tests, norm_roll_Hosp, norm_roll_ICU)]
##       Country norm_roll_Cases norm_roll_Deaths norm_roll_Tests norm_roll_Hosp
## 1:    Austria        5917.297         31.49420        22527.61              0
## 2:    Germany        1578.767         13.87631        18876.15              0
## 3:     Greece        1624.396         26.29469        13830.54              0
## 4:      Italy        4052.748         58.00242        23306.92              0
## 5: Luxembourg        5557.963         50.49732       110460.44              0
##    norm_roll_ICU
## 1:       0.00000
## 2:       0.00000
## 3:      14.26627
## 4:       0.00000
## 5:       0.00000

Since the beginning

DT[Date==current & Country %in% topInterest, .(Country, norm_tot_Cases, norm_tot_Deaths, norm_tot_Tests, norm_tot_Hosp, norm_tot_ICU)]
##       Country norm_tot_Cases norm_tot_Deaths norm_tot_Tests norm_tot_Hosp
## 1:    Austria      21563.252       177.67694        1836123         0.000
## 2:    Germany       9317.795       149.09802        2104606      2775.996
## 3:     Greece       6496.746        92.96385        1274111         0.000
## 4:      Italy      18345.118       731.26793        1975683         0.000
## 5: Luxembourg      41078.753       343.70755       13094373         0.000
##    norm_tot_ICU
## 1:       0.0000
## 2:       0.0000
## 3:     584.4508
## 4:       0.0000
## 5:       0.0000

Timeline

## TODO: Add ICU/Hosp to tidyDT

# Numbers over time
tidyDT <- melt(DT[, .(Date, Country, 
                      norm_new_Cases, norm_new_Deaths, norm_new_Tests, #norm_new_Hosp, norm_new_ICU,
                      norm_tot_Cases, norm_tot_Deaths, norm_tot_Tests, #norm_tot_Hosp, norm_tot_ICU,
                      norm_roll_Cases, norm_roll_Deaths, norm_roll_Tests#, norm_roll_Hosp, norm_roll_ICU
                      )],
                                id.vars = c('Date', 'Country'), variable.name = 'Type', value.name = 'Normalized_count')
tidyDT[grepl('Death', Type), vsplit := 'Deaths']
tidyDT[grepl('Cases', Type), vsplit := 'Cases']
tidyDT[grepl('Tests', Type), vsplit := 'Tests']
# tidyDT[grepl('Hosp', Type), vsplit := 'Hosp']
# tidyDT[grepl('ICU', Type), vsplit := 'ICU']
tidyDT[, hsplit := sub('_Cases|_Deaths|_Tests|_Hosp|_ICU', '', sub('norm_', '', Type), perl=TRUE)]
setkey(tidyDT, Country)

# Rolling VS Total (exponential-ness)
expDT <- DT[, .(Date, Country, norm_tot_Cases, norm_tot_Deaths, norm_roll_Cases, norm_roll_Deaths)]
tmptot <- melt(expDT, id.vars = c('Date', 'Country'), measure.vars = c('norm_tot_Cases', 'norm_tot_Deaths'), variable.name = 'Type', value.name = 'norm_Total')
tmptot[grepl('Death', Type), vsplit := 'Deaths']
tmptot[!grepl('Death', Type), vsplit := 'Cases']
tmproll <- melt(expDT, id.vars = c('Date', 'Country'), measure.vars = c('norm_roll_Cases', 'norm_roll_Deaths'), variable.name = 'Type', value.name = 'norm_Roll')
tmproll[grepl('Death', Type), vsplit := 'Deaths']
tmproll[!grepl('Death', Type), vsplit := 'Cases']
expDT <- merge(tmproll, tmptot, by=c('Date','Country', 'vsplit'))
setkey(expDT, Country)

# Rate of change
rateDT <- DT[, .(Date, Country, roll_Cases_Rate, roll_Deaths_Rate)]
rateDT <- melt(rateDT, id.vars = c('Date', 'Country'), value.name = 'daily_Rate', variable.name = 'Type')
rateDT[grepl('Death', Type), vsplit := 'Deaths']
rateDT[!grepl('Death', Type), vsplit := 'Cases']
setkey(rateDT, Country)

# Singular plots by information type.
relative_plot <- function(df=expDT, sel_country, title='') {
  ggplot(df, aes(x=norm_Total, y=norm_Roll, group=Country, colour=vsplit)) +
      facet_grid(vsplit ~ ., scales='free_y') +
        geom_line(colour='black', alpha=0.1, size=0.2) +
        geom_line(data=df[sel_country,], size=0.8) +
        scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
        scale_x_log10() +
        scale_y_log10() +
        coord_cartesian(xlim=c(1,NA), ylim=c(1,NA)) +
        annotation_logticks(sides='lb') +
        labs(title=title) +
        theme_bw() +
    theme(panel.grid=element_blank(),
          legend.position = 'none')
}

numbers_plot <- function(df=tidyDT, sel_country, title='', linear=FALSE) {
  p1 <- ggplot(df, aes(x=Date, y=Normalized_count, group=Country, colour=vsplit)) +
        facet_grid(vsplit ~ hsplit, scales = 'free_y') +
        # geom_line(colour='#000000', alpha=0.1, size=0.2) +  ## Plotting all countries as background for 9 facets
                                                          ## demands too much memory and crashes.
        geom_line(data=df[sel_country,], size=0.8) +
        scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
        # coord_cartesian(ylim=c(1,NA)) +
        labs(title=title, y='', x='') +
        theme_bw() + 
        theme(legend.position = 'none',
                    panel.grid = element_blank())
  
  if (linear) {
    p1
  } else {
    p1 + scale_y_log10() +
      annotation_logticks(sides='lr')
  }
}

rate_plot <- function(df=rateDT, sel_country, title='', D=W) {
    ggplot(df, aes(x=Date, y=daily_Rate, group=Country, colour=vsplit)) +
    facet_grid(vsplit ~ ., scales='free_y') +
        geom_hline(yintercept = 1, size=0.2) +
      geom_point(data=df[sel_country,], size=0.5) +
        geom_smooth(data=df[sel_country,], span=0.3, size=0.8) +
        scale_colour_manual(values=c(Deaths=death_col, Cases=case_col)) +
        coord_cartesian(ylim=c(0.6, 1.5)) +
        labs(title=title, x='', y='change_Ratio') +
        theme_bw() +
    theme(legend.position = 'none')
}

ratio_plot <- function(df=tidyDT, sel_country, ref_country=homecountry, title='') {
  ratioDT <- merge(df[sel_country], df[ref_country,], by=c('Date', 'Type'))
  ratioDT[, relative := Normalized_count.x / Normalized_count.y]
  ggplot(ratioDT[hsplit.y=='roll',], aes(x=Date, y=relative, colour=vsplit.y)) +
        facet_grid(vsplit.y ~ hsplit.y, scales = 'free_y') +
        geom_hline(yintercept = 1, size=0.15) +
        geom_point(size=0.5) +
      geom_smooth(span=0.3, size=0.75) +
        scale_y_continuous(trans='log2', breaks=c(1/64, 1/16, 1/4, 1, 4, 16, 64)) +
        scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
        coord_cartesian(ylim=c(1/33, 33)) +
        # annotation_logticks(sides='lr') +
        labs(title=title, y='', x='') +
        theme_bw() + 
        theme(legend.position = 'none',
                    axis.text.y.left = element_text())
}

## TODO : Add Hosp/Test, ICU/Hosp, Death/ICU rates to more_plots()

# Collections of singular plots.
more_plots <- function(numbers=tidyDT, exponential=expDT, rates=rateDT, sel_country, ref_country=homecountry){

  cat(sel_country)
  p1 <- relative_plot(exponential, sel_country, 
                      title=paste(sel_country, ': ', W, '-day rolling sum, per', P/1e6, 'M residents, as function of total'))
    p2 <- numbers_plot(numbers[,], sel_country, 
                       title=paste(sel_country, ': new, total &', W, '-day rolling sum, normalized to', P/1e6, 'M residents'))
    p3 <- rate_plot(rates, sel_country,
                    title=paste(sel_country, ': Daily change rate of', W, '-day roll. sum'))
    p4 <- ratio_plot(numbers, sel_country, ref_country,
                     title=paste(sel_country,':', W, '-day norm.roll. sum, relative to', ref_country))
    
    print( p1 / p2 / p3 / p4 )
}

fewer_plots <- function(numbers=tidyDT[hsplit=='roll',], exponential=expDT, rates=rateDT, sel_country, ref_country=homecountry){

  cat(sel_country)
  p1 <- relative_plot(exponential, sel_country,
                      title=paste(sel_country, ': ', W, '-day rolling sum, per', P/1e6, 'M residents, as function of total'))
    p2 <- numbers_plot(numbers, sel_country, linear=TRUE,
                       title=paste(sel_country, ': new, total &', W, '-day rolling sum, normalized to', P/1e6, 'M residents'))
    # p3 <- rate_plot(rates, sel_country,
                    # title=paste(sel_country, ': Daily change rate of', W, '-day roll. sum'))
  
    p4 <- ratio_plot(numbers, sel_country, ref_country, 
                     title=paste(sel_country,':', W, '-day norm.roll. sum, relative to', ref_country))
  
    print( p1 / p2 / p4)
}

# Easier comparison plots. Many countries by info type.

compare_numbers <- function(df=tidyDT, info, countries) {
  subdf <- df[Type %in% info & Country %in% countries, ]
  m1 <- max(subdf[Date==current, Normalized_count], na.rm = TRUE)
  
  p1 <- ggplot(subdf, aes(x=Date, y=Normalized_count, group=Country, colour=vsplit)) +
    geom_hline(yintercept=m1, size=0.25, colour='black', linetype='dotted') +
    geom_line() +
    geom_point(data=subdf[Normalized_count==m1 & Date==current,], colour='black') +
    facet_grid(Country ~ ., ) +
    scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
    theme_bw() +
    theme(legend.position = 'none')
  
  p2 <- p1 +   
    scale_y_log10() +
    annotation_logticks(side='lr') +
    coord_cartesian(ylim=c(1,NA))
  
  print((p1 + labs(y=paste(info, '(linear scale)'))) + 
        (p2 + labs(y=paste(info, '(log scale)'))))  
}

compare_exp <- function(df, type, info_x, info_y, countries) {
  subdf <- df[vsplit %in% type & Country %in% countries, ]
  m1 <- max(subdf[Date==current, c(info_y), with=FALSE], na.rm = TRUE)
  m2 <- max(subdf[Date==current, c(info_x), with=FALSE], na.rm = TRUE)
  
  p1 <- ggplot(subdf, aes_string(x=info_x, y=info_y, group='Country', colour='vsplit')) +
    geom_hline(yintercept=m1, size=0.25, colour='black', linetype='dotted') +
    geom_vline(xintercept=m2, size=0.25, colour='black', linetype='dotted') +
    geom_line() +
    geom_point(data=subdf[(subdf[[info_x]]==m2 | subdf[[info_y]]==m1) & Date==current,], colour='black') +
    facet_grid(Country ~ ., ) +
    scale_colour_manual(values=c(Deaths=death_col, Cases=case_col, Tests=test_col, Hosp=hosp_col, ICU=icu_col)) +
    theme_bw() +
    theme(legend.position = 'none')
  
  p2 <- p1 +   
    scale_y_log10() +
    scale_x_log10() +
    annotation_logticks(side='lb') +
    coord_cartesian(ylim=c(1,NA), xlim=c(1,NA))
  
  print((p1 + labs(y=paste(info_y, type, '(linear scale)'), x=paste(info_x, type, '(linear scale)') )) + 
        (p2 + labs(y=paste(info_y, type, '(log scale)'), x=paste(info_x, type, '(linear scale)') )))  
}

Global

subDT <- melt(unique(DT[, .(Date, gNew_Cases, gNew_Deaths, gTotal_Cases, gTotal_Deaths, gRoll_Cases, gRoll_Deaths)]),
                            id.vars="Date", variable.name="Type", value.name="Events")

p1 <- ggplot(subDT, aes(x=Date, y=Events, colour=Type, fill=Type)) +
    facet_grid( sub('^g', '', sub('_Cases', '', sub('_Deaths', '', subDT$Type))) ~ ., scales = 'free_y') +
    geom_line() +
    theme_minimal() + 
    labs(x='', y='') +
    theme(legend.position='none')

p2 <- ggplot(subDT, aes(x=Date, y=Events, colour=Type, fill=Type)) +
    facet_grid( sub('^g', '', sub('_Cases', '', sub('_Deaths', '', subDT$Type))) ~ ., scales = 'free_y') +
    geom_line() +
    scale_y_log10() +
    labs(x='', y='') +
    theme_minimal()

print( p1 + p2 )

subDT <- unique(DT[, .(Date, gRoll_Cases_Rate, gRoll_Deaths_Rate, gMortality)])

p3 <- ggplot(subDT, aes(x=Date, y=gRoll_Cases_Rate)) +
    geom_hline(yintercept = 1, size=0.1) +
  geom_point(colour=case_col, size=0.5) +
    geom_smooth(span=0.2, colour=case_col, size=0.5) +
    coord_cartesian(ylim=c(0.8, 1.2)) +
    labs(title=paste('Daily change rate of ', W, '-day rolling sum')) +
    theme_bw()

p4 <- ggplot(subDT, aes(x=Date, y=gRoll_Deaths_Rate)) +
    geom_hline(yintercept = 1, size=0.1) +
  geom_point(colour=death_col, size=0.5) +
    geom_smooth(span=0.2, colour=death_col, size=0.5) +
    coord_cartesian(ylim=c(0.8, 1.2)) +
    labs(title='') +
        theme_bw()

p5 <- ggplot(subDT, aes(x=Date, y=gMortality)) +
    geom_hline(yintercept = 0, size=0.1) +
  geom_point(colour=death_col, size=0.5) +
    geom_smooth(span=0.3, colour=death_col, size=0.5) +
    labs(title='') +
        theme_bw()

print( p3 / p4 / p5 )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Detailed

Countries of personal interest

More plots

for (i in topInterest) {
    more_plots(tidyDT, expDT, rateDT, i, homecountry)
  
  ggsave(paste0('~/covid/', i, '_more.png'), plot = last_plot(), scale = 1,
       width = 20, height = 40, units = "cm", dpi = 300, limitsize = TRUE)
}
## Austria
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Italy
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Greece
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Luxembourg
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Germany
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Fewer plots

for (i in topInterest) {
    fewer_plots(tidyDT[hsplit=='roll',], expDT, rateDT, i, homecountry)
  
  ggsave(paste0('~/covid/', i, '_fewer.png'), plot = last_plot(), scale = 1,
       width = 20, height = 40, units = "cm", dpi = 300, limitsize = TRUE)
}
## Austria
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Italy
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Greece
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Luxembourg
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Germany
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Comparative

Selected countries

countries <- c(topInterest, other_countries, neighbour_countries)

for (j in c('norm_roll_Cases', 'norm_roll_Deaths', 'norm_new_Tests')) {
    compare_numbers(tidyDT, j, countries)
  
  ggsave(paste0('~/covid/', j, '_comp.png'), plot = last_plot(), scale = 1,
       width = 30, height = 3 * length(countries), units = "cm", dpi = 300, limitsize = TRUE)
}

for (j in c('Cases', 'Deaths')) {
  compare_exp(expDT, j, 'norm_Total', 'norm_Roll', countries)
    
  ggsave(paste0('~/covid/', j, '_comp.png'), plot = last_plot(), scale = 1,
       width = 30, height = 3 * length(countries), units = "cm", dpi = 300, limitsize = TRUE)
  }

Session

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rgeos_0.5-5       maptools_1.0-2    rgdal_1.5-18      sp_1.4-4         
##  [5] broom_0.7.2       data.table_1.13.0 patchwork_1.0.1   ggrepel_0.8.2    
##  [9] gganimate_1.0.7   ggplot2_3.3.2     lubridate_1.7.9   httr_1.4.2       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5        pillar_1.4.6      compiler_4.0.3    prettyunits_1.1.1
##  [5] tools_4.0.3       progress_1.2.2    digest_0.6.25     nlme_3.1-149     
##  [9] lattice_0.20-41   evaluate_0.14     lifecycle_0.2.0   tibble_3.0.4     
## [13] gtable_0.3.0      mgcv_1.8-33       pkgconfig_2.0.3   rlang_0.4.8      
## [17] Matrix_1.2-18     yaml_2.2.1        xfun_0.18         withr_2.3.0      
## [21] stringr_1.4.0     dplyr_1.0.2       knitr_1.30        generics_0.0.2   
## [25] vctrs_0.3.4       hms_0.5.3         grid_4.0.3        tidyselect_1.1.0 
## [29] glue_1.4.2        R6_2.4.1          gifski_0.8.6      foreign_0.8-80   
## [33] rmarkdown_2.5     tidyr_1.1.2       farver_2.0.3      purrr_0.3.4      
## [37] tweenr_1.0.1      magrittr_1.5      splines_4.0.3     backports_1.1.10 
## [41] scales_1.1.1      ellipsis_0.3.1    htmltools_0.5.0   colorspace_1.4-1 
## [45] labeling_0.4.2    stringi_1.5.3     munsell_0.5.0     crayon_1.3.4